library(tidyverse)
library(ggplot2)
library(here)
library(plotly)
library(readxl)
vqb5 <- read_excel("VQB5-Quality-Rating-Data-for-Website---2023-2024.xlsx")Virginia Quality Birth to Five System (VQB5) Evaluation & Analysis
Import data and packages
Data cleaning
vqb5_cleaned <- vqb5 |>
# keep variables relevant with quality ratings and results
select(`Ready Region Name`, `Site Type`, `Site Name`, `Other Care Options`, `Quality Rating`, `Interaction Points`, `Curriculum Points`, `Total Points`, `Operating Period`, `Ready Region Name`
) |>
# transform all variable names to lower case and replace spaces with underscores for readability and reproducibility
rename_with(~ str_replace_all
(tolower(.), " ", "_")
)1. Summarize the number of sites in each quality result category
quality_counts <- vqb5_cleaned |>
group_by(quality_rating) |>
summarise(site_count = n()) |>
mutate(
percent = round(100 * site_count / sum(site_count), 2)
)
quality_counts# A tibble: 3 × 3
quality_rating site_count percent
<chr> <int> <dbl>
1 Exceeds Expectations 79 2.53
2 Meets Expectations 2993 95.8
3 Needs Support 51 1.63
2. Summarize quality ratings by site type
# summarize quality counts by site type
quality_by_site_type <- vqb5_cleaned |>
group_by(site_type, quality_rating) |>
summarise(site_count = n(), .groups = "drop") |>
group_by(site_type) |>
mutate(
percent = round(100 * site_count / sum(site_count), 2),
quality_rating = factor(
quality_rating,
levels = c("Exceeds Expectations", "Meets Expectations", "Needs Support")
)
)
quality_by_site_type# A tibble: 8 × 4
# Groups: site_type [3]
site_type quality_rating site_count percent
<chr> <fct> <int> <dbl>
1 Center Exceeds Expectations 19 1.25
2 Center Meets Expectations 1483 98.0
3 Center Needs Support 12 0.79
4 Family Day Home Exceeds Expectations 28 3.71
5 Family Day Home Meets Expectations 687 91.1
6 Family Day Home Needs Support 39 5.17
7 Public School Exceeds Expectations 32 3.74
8 Public School Meets Expectations 823 96.3
# create a plot with custom hover text
p1 <- quality_by_site_type |>
ggplot(aes(
x = quality_rating,
y = site_count,
fill = quality_rating,
text = paste0("Sites: ", site_count, "\nPercent: ", percent, "%")
)) +
geom_col() +
facet_wrap(~ site_type) +
scale_fill_manual(values = c(
"Exceeds Expectations" = "#1b9e77",
"Meets Expectations" = "#7570b3",
"Needs Support" = "#d95f02"
)) +
labs(
title = "Quality Ratings by Site Type",
x = NULL,
y = "Number of Sites",
fill = "Quality Rating"
) +
theme_minimal() +
theme(
panel.grid.major.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# Convert to interactive plotly, showing site count on hover
ggplotly(p1, tooltip = "text", width = 1000, height = 500)3. Summarize quality ratings by ready region
# Summarize quality counts by ready region name
quality_by_ready_region <- vqb5_cleaned |>
group_by(ready_region_name, quality_rating) |>
summarise(region_count = n(), .groups = "drop") |>
group_by(ready_region_name) |>
mutate(
percent = round(100 * region_count / sum(region_count), 2),
quality_rating = factor(
quality_rating,
levels = c("Exceeds Expectations", "Meets Expectations", "Needs Support")
)
)
quality_by_ready_region # A tibble: 24 × 4
# Groups: ready_region_name [9]
ready_region_name quality_rating region_count percent
<chr> <fct> <int> <dbl>
1 Ready Region 1 - Southwest Exceeds Expectations 7 3.26
2 Ready Region 1 - Southwest Meets Expectations 208 96.7
3 Ready Region 2 - West Exceeds Expectations 2 1.12
4 Ready Region 2 - West Meets Expectations 175 97.8
5 Ready Region 2 - West Needs Support 2 1.12
6 Ready Region 3 - Southside Exceeds Expectations 8 5.3
7 Ready Region 3 - Southside Meets Expectations 143 94.7
8 Ready Region 4 - Central Exceeds Expectations 6 1.45
9 Ready Region 4 - Central Meets Expectations 402 97.1
10 Ready Region 4 - Central Needs Support 6 1.45
# ℹ 14 more rows
# Create plot with custom hover text
p2 <- quality_by_ready_region |>
ggplot(aes(
x = quality_rating,
y = region_count,
fill = quality_rating,
text = paste0("Sites: ", region_count, "\nPercent: ", percent, "%")
)) +
geom_col() +
facet_wrap(~ ready_region_name) +
scale_fill_manual(values = c(
"Exceeds Expectations" = "#1b9e77",
"Meets Expectations" = "#7570b3",
"Needs Support" = "#d95f02"
)) +
labs(
title = "Quality Ratings by Ready Region",
x = NULL,
y = "Number of Sites",
fill = "Quality Rating"
) +
theme_minimal() +
theme(
panel.grid.major.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
ggplotly(p2, tooltip = "text", width = 1000, height = 600)